from operator import add
from pyspark import SparkContext, SparkConf
from pyspark.streaming import StreamingContext

conf = SparkConf()
conf.setAppName('TestDStream') # Application Name
conf.setMaster('local[2]') # 本地模式下，并且启动2个工作线程

sc = SparkContext(conf=conf) # SparkConext
ssc = StreamingContext(sc, 10) # 每隔1秒钟就自动执行一次流计算

lines = ssc.textFileStream('D:/Data/Test')
words = lines.flatMap(lambda line: line.split(' '))
wordCounts = words.map(lambda x : (x,1)).reduceByKey(add)
wordCounts.pprint()

ssc.start()
ssc.awaitTermination()